import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


public class SourceScraper {

	/**
	 * @param args
	 * @throws IOException 
	 * @author Lance Godfrey
	 */
	public static void main(String[] args) throws IOException {

		// Uncomment these lines to let it work at Massey. 
//		Properties props = System.getProperties();
//		props.put("http.proxyHost", "tur-cache.massey.ac.nz");
//		props.put("http.proxyPort", "8080");

		URL url = new URL("http://www.w3schools.com/html/lastpage.htm");
        URLConnection urlconnection = url.openConnection();
        BufferedReader in = new BufferedReader(
                                new InputStreamReader(
                                urlconnection.getInputStream()));
        String inputLine;
        String totalSource = "";

        System.out.println("Here is the page's source:\n");
        while ((inputLine = in.readLine()) != null) {
        	System.out.println(inputLine);
        	totalSource = totalSource + inputLine;
        	}
        in.close();
        
        System.out.println("\nHere is the page's header:\n");
        String expr = "<head>" + ".*" + "</head>";
        
        Pattern patt = Pattern.compile(expr, Pattern.DOTALL | Pattern.UNIX_LINES);
        Matcher m = patt.matcher(totalSource);
        while (m.find()) {
        	  System.out.println(totalSource.substring(m.start(), m.end()));
        	}
	}
}
